//1. droping unnecessary observations and keeping only farm level data.
egen family_size= count(idc),by (hhcode)
keep if age>=10 & prod_total_value !=. 
drop if prod_total_value==0
drop if land_culti_total==0
//run the occupation do file before the following

egen family_worker= count(s1bq06) if s1bq06==5 & occupation!=. , by (hhcode)
egen family_worker_mean= mean(family_worker),by (hhcode)
//2. keep if person is working in agri, live stock or familyworker in agri or head of household
keep if idc==idc101 | idc==idc195 | family_worker!= . | s1aq02==1
egen farm_parti=count(idc) if idc==idc101,by (hhcode)
egen farm_live_parti=count(idc) if idc==idc101 | idc==idc195,by (hhcode)
egen farm_parti_mean= mean(farm_parti),by(hhcode)
egen farm_live_parti_mean= mean(farm_live_parti),by(hhcode)
//keep if s1aq02==1
// keep if rel_head==1 | s1bq06==5 | s1bq06==6 | s1bq06==7 | s1bq06==8
//egen family_worker =count (idc) if s1bq06==5 & occupation!=. , by (hhcode)
//farm size dummy 
gen farm_size_dummy=1 if land_culti_total>0 & land_culti_total<=2.5
replace farm_size_dummy=2 if land_culti_total>2.5 & land_culti_total<=12.5
replace farm_size_dummy=3 if land_culti_total>12.5 & land_culti_total<.

//construction of labor input cost variables
//gen input_labor_perm1=input_labor_perm
//replace input_labor_perm1=0 if input_labor_perm==.
//gen input_labor_casual1=input_labor_casual
//replace input_labor_casual1=0 if input_labor_casual==.
egen labor_cost_ue= rsum(input_labor_casual input_labor_perm)
egen labor_cost_comb_ue=rsum(input_labor_casual input_labor_perm s10c1_189 s10c2_189)


// including missing labor cost of 213 per day like family labor self cultivator etc
//self_nonagri_labor
//paid_labor_cost
gen family_labor_cost1=273*30*12*family_worker if s1bq06==5 & occupation!=. 
egen family_labor_cost= mean(family_labor_cost1) , by (hhcode)
replace family_labor_cost=0 if family_labor_cost==.

gen owncul_labor_cost1=343*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s1bq06==6
replace owncul_labor_cost1=343*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s1bq06==6 & occu_type101==2
egen owncul_labor_cost=mean(owncul_labor_cost1),by (hhcode)

gen sharecrop_labor_cost1=347*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & owncul_labor_cost==. & s1bq06==7
replace sharecrop_labor_cost1=347*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & owncul_labor_cost==. & s1bq06==7 & occu_type101==2
egen sharecrop_labor_cost=mean(sharecrop_labor_cost1),by(hhcode)

gen contcul_labor_cost1=351*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & owncul_labor_cost==. & sharecrop_labor_cost==. & s1bq06==8
replace contcul_labor_cost1=351*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & owncul_labor_cost==. & sharecrop_labor_cost==. & s1bq06==8 & occu_type101==2
egen contcul_labor_cost=mean(contcul_labor_cost1),by(hhcode)

gen otheroccup_labor_cost1=343*0.527*30*12 if input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & owncul_labor_cost==. & sharecrop_labor_cost==. & contcul_labor_cost==. & s1bq06!=6 & s1bq06!=7 & s1bq06!=8
egen otheroccup_labor_cost= mean(otheroccup_labor_cost1),by(hhcode)

//including missing labor cost for combine farm and livestock

gen owncul_labor_cost2=343*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & s1bq06==6
replace owncul_labor_cost2=343*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189 & s10c2_189 & s1bq06==6 & occu_type101==2
egen owncul_labor_cost_comb=mean(owncul_labor_cost2),by (hhcode)

 gen sharecrop_labor_cost2=347*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & owncul_labor_cost==. & s1bq06==7
replace sharecrop_labor_cost2=347*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & owncul_labor_cost==. & s1bq06==7 & occu_type101==2
egen sharecrop_labor_cost_comb=mean(sharecrop_labor_cost2),by(hhcode)

gen contcul_labor_cost2=351*30*12 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & owncul_labor_cost==. & sharecrop_labor_cost==. & s1bq06==8
replace contcul_labor_cost2=351*30*12*0.5 if  input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & owncul_labor_cost==. & sharecrop_labor_cost==. & s1bq06==8 & occu_type101==2
egen contcul_labor_cost_comb=mean(contcul_labor_cost2),by(hhcode)

gen otheroccup_labor_cost2=343*0.527*30*12 if input_labor_casual==. & input_labor_perm==. & family_labor_cost==. & s10c1_189==. & s10c2_189==. & owncul_labor_cost==. & sharecrop_labor_cost==. & contcul_labor_cost==. & s1bq06!=6 & s1bq06!=7 & s1bq06!=8
egen otheroccup_labor_cost_comb= mean(otheroccup_labor_cost2),by(hhcode)



//total labor cost including missing and not-reported only for farm and combine sector

egen labor_cost_total= rsum(family_labor_cost  owncul_labor_cost sharecrop_labor_cost contcul_labor_cost otheroccup_labor_cost labor_cost_ue)
egen labor_cost_total_comb= rsum(family_labor_cost  owncul_labor_cost_comb sharecrop_labor_cost_comb contcul_labor_cost_comb otheroccup_labor_cost_comb labor_cost_comb_ue)
gen family_labor_cost_share= (family_labor_cost/labor_cost_total_comb)*100

gen family_labor_cost_share1= family_labor_cost_share+1
gen lfamily_labor_cost_share=ln(family_labor_cost_share1)

//missing labor cost for farm and combine sector
egen labor_cost_mis=rsum(family_labor_cost owncul_labor_cost sharecrop_labor_cost contcul_labor_cost otheroccup_labor_cost)
egen labor_cost_mis_comb=rsum(family_labor_cost owncul_labor_cost_comb sharecrop_labor_cost_comb contcul_labor_cost_comb otheroccup_labor_cost_comb)
  
  //per acre estimates of total labor cost including missing
gen labor_pa_ue=labor_cost_ue/land_culti_total
gen labor_pa_comb_ue=labor_cost_comb_ue/land_culti_total
gen labor_costpa_total= labor_cost_total/land_culti_total
gen labor_costpa_total_comb= labor_cost_total_comb/land_culti_total

//total input cost including missing labor cost for farm and combine sector

egen input_cost_total=rsum(input_total labor_cost_mis)
egen input_cost_total_comb=rsum(input_total labor_cost_mis_comb s10c1_195 s10c2_195)
egen input_total_comb_ue=rsum(input_total s10c1_195 s10c2_195 )

gen mis_act_lab_cost_ratio = (labor_cost_mis_comb/labor_cost_total_comb)*100
egen mis_act_lab_cost_ratio_size = mean(mis_act_lab_cost_ratio),by (farm_size_dummy)
//getting the potion of income or time given to the second occupation
//gen wagem_wages=s1bq15/ (wage_monthly*12)
//egen wage_ratio_mean= mean ( wagem_wages) if labor_cost_total==0

gen input_costpa_ue= input_total/land_culti_total
gen input_costpa_comb_ue= input_total_comb_ue/land_culti_total
gen input_costpa_total= input_cost_total/land_culti_total
gen input_costpa_total_comb= input_cost_total_comb/land_culti_total


//combing livestock with agri produciton
replace s10c1_165=0 if s10c1_165==.
replace s10c2_165=0 if s10c2_165==.
replace s10c3_165=0 if s10c3_165==.
replace s10c4_165=0 if s10c4_165==.
replace s10c5_165=0 if s10c5_165==.
replace s10c3_180=0 if s10c3_180==.
replace s10c1_195=0 if s10c1_195==.
replace s10c2_195=0 if s10c2_195==.

//gen prod_live= s10c1_165-s10c2_165+s10c3_165-s10c4_165-s10c5_165+s10c3_180
replace prod_live=. if prod_live==0 
egen prod_comb=rsum(prod_live prod_total_value)
gen yieldpa=prod_total_value/land_culti_total
gen yieldpa_live=prod_live/land_culti_total
gen yieldpa_comb=prod_comb/land_culti_total
gen lyieldpa_comb = ln(yieldpa_comb)


gen io_ratio_total_comb= yieldpa_comb/input_costpa_total_comb
gen lio_ratio_total_comb =ln(io_ratio_total_comb)




//3. keep if person is working in agri, love stock or familyworker in agri
//keep if idc==idc101 | idc==idc195 | family_worker!= . 
//4. collaping the data by households head

keep if s1aq02==1

//5. summeraizing the data by farm size
egen labor_costpa_total_size= mean (labor_costpa_total), by (farm_size_dummy)
egen labor_costpa_total_comb_size= mean (labor_costpa_total_comb), by (farm_size_dummy)
egen input_costpa_total_size=mean(input_costpa_total),by (farm_size_dummy)
egen input_costpa_total_comb_size=mean(input_costpa_total_comb),by (farm_size_dummy)
egen yielpa_live_size=mean(yieldpa_live),by (farm_size_dummy)
egen yielpa_comb_size=mean(yieldpa_comb),by (farm_size_dummy)
egen yieldpa_size =mean(yieldpa),by (farm_size_dummy)
egen io_ratio_total_size= mean(io_ratio_total),by(farm_size_dummy)
egen io_ratio_total_comb_size= mean(io_ratio_total_comb),by(farm_size_dummy)

//collapse (mean) family_size family_worker family_worker_mean    input_costpa_total_size input_costpa_total_comb_size yielpa_live_size yielpa_comb_size io_ratio_total_size io_ratio_total_comb_size,by(farm_size_dummy)


// estimates of former reported price-bias
 gen wheat_price = prod_wheat_value /prod_wheat_qty
 gen cotton_price= prod_cotton_value/ prod_cotton_qty 
 gen sugar_price = prod_sugar_value/ prod_sugar_qty 
 gen rice_price = prod_rice_value/ prod_rice_qty
 gen maize_price = prod_maize_value/ prod_maize_qty
 gen pulses_price = prod_pulses_value/ prod_pulses_qty 
 gen fruits_price = prod_fruits_value/ prod_fruits_qty 
 gen vegit_price = prod_vegit_value/ prod_vegit_qty 
 gen fodder_price = prod_fodder_value/ prod_fodder_qty 
 gen other_price = prod_other_value/ prod_other_qty
 gen by_price = prod_by_value/ prod_by_qty
 
 egen wheat_meanp= mean(wheat_price) ,by (farm_size_dummy)
 egen cotton_meanp= mean(cotton_price) ,by (farm_size_dummy)
 egen sugar_meanp= mean(sugar_price) ,by (farm_size_dummy)
 egen rice_meanp= mean(rice_price) ,by (farm_size_dummy)
 egen maize_meanp= mean(maize_price) ,by (farm_size_dummy)
 egen pulses_meanp= mean(pulses_price) ,by (farm_size_dummy)
 egen fruits_meanp= mean(fruits_price) ,by (farm_size_dummy)
 egen vegit_meanp= mean(vegit_price) ,by (farm_size_dummy)
 egen fodder_meanp= mean(fodder_price) ,by (farm_size_dummy)
 egen other_meanp= mean(other_price) ,by (farm_size_dummy)
 egen by_meanp= mean(by_price) ,by (farm_size_dummy)
 
 
 
//correcttion for self-reported price bais
replace prod_wheat_qty=0 if prod_wheat_qty==.
replace prod_cotton_qty=0 if prod_cotton_qty==.
replace prod_sugar_qty=0 if prod_sugar_qty==.
replace prod_rice_qty=0 if prod_rice_qty==.
replace prod_maize_qty=0 if prod_maize_qty==.
replace prod_pulses_qty=0 if prod_pulses_qty==.
replace prod_vegit_qty=0 if prod_vegit_qty==.
replace prod_fodder_qty=0 if prod_fodder_qty==.
replace prod_other_qty=0 if prod_other_qty==.
replace prod_by_qty=0 if prod_by_qty==.
 
 //correcting values for wrong weight measurement i.e. reporting 40kg instead of acutal 1kg weight for all farms that mis-reported their weights
//correcting for price bias by correcing high price values

 gen prod_wheat_value3 = prod_wheat_value
 gen prod_cotton_value3 = prod_cotton_value
 gen prod_sugar_value3 = prod_sugar_value
 gen prod_rice_value3 = prod_rice_value
 gen prod_maize_value3 = prod_maize_value
 gen prod_fodder_value3 = prod_fodder_value
 
 replace prod_wheat_value3 = (prod_wheat_value/wheat_price)*1300  if wheat_price>1500  
 replace prod_cotton_value3 = (prod_cotton_value/cotton_price)*3000 if cotton_price>4150
 replace prod_sugar_value3 = (prod_sugar_value/sugar_price)*180 if sugar_price>400
 replace prod_rice_value3 = (prod_rice_value/rice_price)*1545 if rice_price >2999
 replace prod_maize_value3 =  (prod_maize_value/maize_price)*1192 if maize_price >1250   
 replace prod_fodder_value3 = (prod_fodder_value/fodder_price)*700 if fodder_price>1000 
 
 
 replace prod_wheat_value3 = 0 if prod_wheat_value3==.
 replace prod_cotton_value3 = 0 if prod_cotton_value3==.
 replace prod_sugar_value3 = 0 if prod_sugar_value3==.
 replace prod_rice_value3 = 0 if prod_rice_value3==.
 replace prod_maize_value3 = 0 if prod_maize_value3==.
 replace prod_fodder_value3 = 0 if prod_fodder_value3==.
 

//estimates of yield and TFP after price adjustment 
egen prod_total_value3=rsum( prod_wheat_value3 prod_cotton_value3 prod_sugar_value3 prod_rice_value3 prod_maize_value3 prod_fodder_value3 prod_pulses_value prod_fruits_value prod_vegit_value prod_other_value prod_by_value)
egen prod_comb3=rsum(prod_live prod_total_value3)
gen yieldpa_comb3=prod_comb3/land_culti_total
gen io_ratio_total_comb3= yieldpa_comb3/input_costpa_total_comb
gen lyieldpa_comb3 = ln( yieldpa_comb3) 
//gen io_ratio_total_comb3= yieldpa_comb3/input_costpa_total_comb
gen lio_ratio_total_comb3 =ln(io_ratio_total_comb3)


//this part is only for incorporating land rent in total cost and generating corresponding values
gen landri_rent_pa = landri_rent/farmland_total if landri_yn==1 & landown_yn==2
egen landri_rent_pa_size = mean (landri_rent_pa), by (farm_size_dummy)
gen landri_rent_total= land_culti_total*35072 if farm_size_dummy==1
replace landri_rent_total= land_culti_total*29188 if farm_size_dummy==2
replace landri_rent_total= land_culti_total*30276 if farm_size_dummy==3
gen landri_rentpa_total = landri_rent_total/land_culti_total
gen land_rent_cost=landri_rentpa_total*farmland_total

gen io_ratio_total_combr= yieldpa_comb/(input_costpa_total_comb+landri_rentpa_total)
gen io_ratio_total_combr_imput=yieldpa_comb/(input_cost_comb_imput_pa+landri_rentpa_total)
gen lio_ratio_total_combr= ln(io_ratio_total_combr) 


//variables construction

gen lland_culti = ln(land_culti_total)
gen family_worker_pa = family_worker_mean/land_culti_total
replace family_worker_pa=0 if family_worker_pa==.
gen family_worker_pa1=family_worker_pa+1
gen lfamily_worker_pa=ln(family_worker_pa1)
egen family_worker_pa_size = mean(family_worker_pa), by (farm_size_dummy)
gen rootwater_mean1 = rootwater_mean+1
gen lrootwater_mean =ln(rootwater_mean1)
gen lDEM_mean=ln(DEM_mean)
gen lROUGH_mean=ln(ROUGH_mean)
gen linput_cost_total_comb = ln( input_cost_total_comb)
gen linput_costpa_total_comb =ln(input_costpa_total_comb)
//gen lrootwater_mean = ln(rootwater_mean)
